INDI Day8 (New, Opera) - Preprocessing QC statistics ¶

July 2025 - Nancy Y¶

Reran on Sep 15 (2025) by Sagy - removing batch10 and WT stress (since we are using NIH for WT untreated vs WT stress) and CD41

In [6]:
import os
import sys

NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
NOVA_DATA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
os.environ['NOVA_HOME'] = NOVA_HOME
sys.path.insert(1, os.getenv("NOVA_HOME"))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")

root_directory_raw = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'raw', 'OPERA_indi_sorted')
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'processed', 'ManuscriptFinalData_80pct','neuronsDay8_new')

LOGS_PATH = os.path.join(NOVA_HOME, "outputs", "preprocessing", "ManuscriptFinalData_80pct", "neuronsDay8_new", "logs")
PLOT_PATH = os.path.join(NOVA_HOME, 'outputs', 'preprocessing', 'ManuscriptFinalData_80pct', "neuronsDay8_new", 'QC_figures')


print(os.environ['NOVA_HOME'])
import pandas as pd
import contextlib
import io
from IPython.display import display, Javascript

from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
                                                show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
                                                show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
                                                calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
                                                plot_cell_count, plot_catplot, plot_hm_of_mean_cell_count_per_tile, \
                                                run_calc_hist_new, show_total_valid_tiles_per_marker_and_batch
                                                
from tools.preprocessing_tools.qc_reports.qc_config import new_d8_panels, new_d8_markers, new_d8_marker_info, new_d8_cell_lines, new_d8_cell_lines_to_cond,\
                                    new_d8_cell_lines_for_disp, new_d8_reps, new_d8_line_colors, new_d8_lines_order, new_d8_custom_palette,\
                                    new_d8_expected_dapi_raw
%load_ext autoreload
%autoreload 2
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA
/home/projects/hornsteinlab/Collaboration/NOVA
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
In [ ]:
# choose batches
batches = ['batch1', 'batch2', 'batch3', 'batch7', 
'batch8', 'batch9']
batches
Out[ ]:
['batch1', 'batch2', 'batch3', 'batch7', 'batch8', 'batch9']
In [8]:
df = log_files_qc(LOGS_PATH, batches, filename_split='-',site_location=0)

df = df[df.condition != 'stress']

df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch8
reading logs of batch3
reading logs of batch9
reading logs of batch10
reading logs of batch2
reading logs of batch1
reading logs of batch7

Total of 15 files were read.
Before dup handeling  (1147717, 21)
After duplication removal #1: (1071227, 22)
After duplication removal #2: (1071227, 22)

Actual Files Validation¶

Raw Files Validation¶

  1. How many site tiff files do we have in each folder?
  2. Are all existing files valid? (tif, at least 2049kB, not corrupetd)
In [9]:
raws = run_validate_folder_structure(root_directory_raw, False, 
                                     new_d8_panels, 
                                     new_d8_markers,
                                     PLOT_PATH,
                                     new_d8_marker_info,
                                     new_d8_cell_lines_to_cond, 
                                     new_d8_reps, 
                                     new_d8_cell_lines_for_disp, 
                                     new_d8_expected_dapi_raw,
                                     batches=batches,
                                     expected_count=250,
                                     check_antibody=False)
batch1
Folder structure is invalid. Missing 11 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelA
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelB
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelC
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelD
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelE
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelF
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelG
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelH
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelI
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelJ
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelL
No bad files are found.
Total Sites:  140000
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           250   250   250          250          250   250   
G3BP1         rep2           250   250   250          250          250   250   
NONO          rep1           250   250   250          250          250   250   
NONO          rep2           250   250   250          250          250   250   
SQSTM1        rep1           250   250   250          250          250   250   
SQSTM1        rep2           250   250   250          250          250   250   
PSD95         rep1           250   250   250          250          250   250   
PSD95         rep2           250   250   250          250          250   250   
NEMO          rep1           250   250   250          250          250   250   
NEMO          rep2           250   250   250          250          250   250   
GM130         rep1           250   250   250          250          250   250   
GM130         rep2           250   250   250          250          250   250   
NCL           rep1           250   250   250          250          250   250   
NCL           rep2           250   250   250          250          250   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   250          250          250   250   
TDP43         rep1           250   250   250          250          250   250   
TDP43         rep2           250   250   250          250          250   250   
ANXA11        rep1           250   250   250          250          250   250   
ANXA11        rep2           250   250   250          250          250   250   
PEX14         rep1           250   250   250          250          250   250   
PEX14         rep2           250   250   250          250          250   250   
mitotracker   rep1           250   250   250          250          250   250   
mitotracker   rep2           250   250   250          250          250   250   
FMRP          rep1           250   250   250          250          250   250   
FMRP          rep2           250   250   250          250          250   250   
SON           rep1           250   250   250          250          250   250   
SON           rep2           250   250   250          250          250   250   
KIF5A         rep1           250   250   250          250          250   250   
KIF5A         rep2           250   250   250          250          250   250   
CLTC          rep1           250   250   250          250          250   250   
CLTC          rep2           250   250   250          250          250   250   
DCP1A         rep1           250   250   250          250          250   250   
DCP1A         rep2           250   250   250          250          250   250   
Calreticulin  rep1           250   250   250          250          250   250   
Calreticulin  rep2           250   250   250          250          250   250   
FUS           rep1           250   250   250          250          250   250   
FUS           rep2           250   250   250          250          250   250   
HNRNPA1       rep1           250   250   250          250          250   250   
HNRNPA1       rep2           250   250   250          250          250   250   
PML           rep1           250   250   250          250          250   250   
PML           rep2           250   250   250          250          250   250   
LAMP1         rep1           250   250   250          250          250   250   
LAMP1         rep2           250   250   250          250          250   250   
SNCA          rep1           250   250   250          250          250   250   
SNCA          rep2           250   250   250          250          250   250   
TIA1          rep1           250   250   250          250          250   250   
TIA1          rep2           250   250   250          250          250   250   
PURA          rep1           250   250   250          250          250   250   
PURA          rep2           250   250   250          250          250   250   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           250   250   250          250          250   250   
Tubulin       rep2           250   250   250          250          250   250   
Phalloidin    rep1           250   250   250          250          250   250   
Phalloidin    rep2           250   250   250          250          250   250   
TOMM20        rep1           250   250   250          250          250   250   
TOMM20        rep2           250   250   250          250          250   250   
DAPI          rep1          3000  3000  3000         3000         3000  3000   
DAPI          rep2          3000  3000  3000         3000         3000  3000   

             FUSHeterozygous SNCA  
Marker                             
G3BP1                    250  NaN  
G3BP1                    250  NaN  
NONO                     250  NaN  
NONO                     250  NaN  
SQSTM1                   250  NaN  
SQSTM1                   250  NaN  
PSD95                    250  NaN  
PSD95                    250  NaN  
NEMO                     250  NaN  
NEMO                     250  NaN  
GM130                    250  NaN  
GM130                    250  NaN  
NCL                      250  NaN  
NCL                      250  NaN  
LSM14A                   250  NaN  
LSM14A                   250  NaN  
TDP43                    250  NaN  
TDP43                    250  NaN  
ANXA11                   250  NaN  
ANXA11                   250  NaN  
PEX14                    250    0  
PEX14                    250    0  
mitotracker              250  NaN  
mitotracker              250  NaN  
FMRP                     250  NaN  
FMRP                     250  NaN  
SON                      250  NaN  
SON                      250  NaN  
KIF5A                    250  NaN  
KIF5A                    250  NaN  
CLTC                     250  NaN  
CLTC                     250  NaN  
DCP1A                    250  NaN  
DCP1A                    250  NaN  
Calreticulin             250  NaN  
Calreticulin             250  NaN  
FUS                      250  NaN  
FUS                      250  NaN  
HNRNPA1                  250  NaN  
HNRNPA1                  250  NaN  
PML                      250  NaN  
PML                      250  NaN  
LAMP1                    250  NaN  
LAMP1                    250  NaN  
SNCA                     250    0  
SNCA                     250    0  
TIA1                     250  NaN  
TIA1                     250  NaN  
PURA                     250  NaN  
PURA                     250  NaN  
CD41                     NaN  NaN  
CD41                     NaN  NaN  
Tubulin                  250  NaN  
Tubulin                  250  NaN  
Phalloidin               250  NaN  
Phalloidin               250  NaN  
TOMM20                   250  NaN  
TOMM20                   250  NaN  
DAPI                    3000    0  
DAPI                    3000    0  
========
batch2
Folder structure is invalid. Missing 2 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch2/WT/panelA/Untreated/rep2
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch2/SNCA
No bad files are found.
Total Sites:  139000
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           250   250   250          250          250   250   
G3BP1         rep2           250   250   250          NaN          250   250   
NONO          rep1           250   250   250          250          250   250   
NONO          rep2           250   250   250          250          250   250   
SQSTM1        rep1           250   250   250          250          250   250   
SQSTM1        rep2           250   250   250          250          250   250   
PSD95         rep1           250   250   250          250          250   250   
PSD95         rep2           250   250   250          250          250   250   
NEMO          rep1           250   250   250          250          250   250   
NEMO          rep2           250   250   250          250          250   250   
GM130         rep1           250   250   250          250          250   250   
GM130         rep2           250   250   250          250          250   250   
NCL           rep1           250   250   250          250          250   250   
NCL           rep2           250   250   250          250          250   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   250          250          250   250   
TDP43         rep1           250   250   250          250          250   250   
TDP43         rep2           250   250   250          250          250   250   
ANXA11        rep1           250   250   250          250          250   250   
ANXA11        rep2           250   250   250          250          250   250   
PEX14         rep1           250   250   250          250          250   250   
PEX14         rep2           250   250   250          250          250   250   
mitotracker   rep1           250   250   250          250          250   250   
mitotracker   rep2           250   250   250          250          250   250   
FMRP          rep1           250   250   250          250          250   250   
FMRP          rep2           250   250   250          NaN          250   250   
SON           rep1           250   250   250          250          250   250   
SON           rep2           250   250   250          250          250   250   
KIF5A         rep1           250   250   250          250          250   250   
KIF5A         rep2           250   250   250          250          250   250   
CLTC          rep1           250   250   250          250          250   250   
CLTC          rep2           250   250   250          250          250   250   
DCP1A         rep1           250   250   250          250          250   250   
DCP1A         rep2           250   250   250          250          250   250   
Calreticulin  rep1           250   250   250          250          250   250   
Calreticulin  rep2           250   250   250          250          250   250   
FUS           rep1           250   250   250          250          250   250   
FUS           rep2           250   250   250          250          250   250   
HNRNPA1       rep1           250   250   250          250          250   250   
HNRNPA1       rep2           250   250   250          250          250   250   
PML           rep1           250   250   250          250          250   250   
PML           rep2           250   250   250          250          250   250   
LAMP1         rep1           250   250   250          250          250   250   
LAMP1         rep2           250   250   250          250          250   250   
SNCA          rep1           250   250   250          250          250   250   
SNCA          rep2           250   250   250          250          250   250   
TIA1          rep1           250   250   250          250          250   250   
TIA1          rep2           250   250   250          250          250   250   
PURA          rep1           250   250   250          250          250   250   
PURA          rep2           250   250   250          NaN          250   250   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           250   250   250          250          250   250   
Tubulin       rep2           250   250   250          250          250   250   
Phalloidin    rep1           250   250   250          250          250   250   
Phalloidin    rep2           250   250   250          250          250   250   
TOMM20        rep1           250   250   250          250          250   250   
TOMM20        rep2           250   250   250          250          250   250   
DAPI          rep1          3000  3000  3000         3000         3000  3000   
DAPI          rep2          3000  3000  3000         2750         3000  3000   

             FUSHeterozygous SNCA  
Marker                             
G3BP1                    250  NaN  
G3BP1                    250  NaN  
NONO                     250  NaN  
NONO                     250  NaN  
SQSTM1                   250  NaN  
SQSTM1                   250  NaN  
PSD95                    250  NaN  
PSD95                    250  NaN  
NEMO                     250  NaN  
NEMO                     250  NaN  
GM130                    250  NaN  
GM130                    250  NaN  
NCL                      250  NaN  
NCL                      250  NaN  
LSM14A                   250  NaN  
LSM14A                   250  NaN  
TDP43                    250  NaN  
TDP43                    250  NaN  
ANXA11                   250  NaN  
ANXA11                   250  NaN  
PEX14                    250  NaN  
PEX14                    250  NaN  
mitotracker              250  NaN  
mitotracker              250  NaN  
FMRP                     250  NaN  
FMRP                     250  NaN  
SON                      250  NaN  
SON                      250  NaN  
KIF5A                    250  NaN  
KIF5A                    250  NaN  
CLTC                     250  NaN  
CLTC                     250  NaN  
DCP1A                    250  NaN  
DCP1A                    250  NaN  
Calreticulin             250  NaN  
Calreticulin             250  NaN  
FUS                      250  NaN  
FUS                      250  NaN  
HNRNPA1                  250  NaN  
HNRNPA1                  250  NaN  
PML                      250  NaN  
PML                      250  NaN  
LAMP1                    250  NaN  
LAMP1                    250  NaN  
SNCA                     250  NaN  
SNCA                     250  NaN  
TIA1                     250  NaN  
TIA1                     250  NaN  
PURA                     250  NaN  
PURA                     250  NaN  
CD41                     NaN  NaN  
CD41                     NaN  NaN  
Tubulin                  250  NaN  
Tubulin                  250  NaN  
Phalloidin               250  NaN  
Phalloidin               250  NaN  
TOMM20                   250  NaN  
TOMM20                   250  NaN  
DAPI                    3000  NaN  
DAPI                    3000  NaN  
========
batch3
Folder structure is invalid. Missing 1 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch3/SNCA
No bad files are found.
Total Sites:  139997
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           250   250   250          250          250   250   
G3BP1         rep2           250   250   250          250          250   250   
NONO          rep1           250   250   250          250          250   250   
NONO          rep2           250   250   249          250          250   250   
SQSTM1        rep1           250   250   250          250          250   250   
SQSTM1        rep2           250   250   250          250          250   250   
PSD95         rep1           250   250   250          250          250   250   
PSD95         rep2           250   250   250          250          250   250   
NEMO          rep1           250   250   250          250          250   250   
NEMO          rep2           250   250   250          250          250   250   
GM130         rep1           250   250   250          250          250   250   
GM130         rep2           250   250   250          250          250   250   
NCL           rep1           250   250   250          250          250   250   
NCL           rep2           250   250   250          250          250   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   250          250          250   250   
TDP43         rep1           250   250   250          250          250   250   
TDP43         rep2           250   250   250          250          250   250   
ANXA11        rep1           250   250   250          250          250   250   
ANXA11        rep2           250   250   250          250          250   250   
PEX14         rep1           250   250   250          250          250   250   
PEX14         rep2           250   250   250          250          250   250   
mitotracker   rep1           250   250   250          250          250   250   
mitotracker   rep2           250   250   250          250          250   250   
FMRP          rep1           250   250   250          250          250   250   
FMRP          rep2           250   250   250          250          250   250   
SON           rep1           250   250   250          250          250   250   
SON           rep2           250   250   249          250          250   250   
KIF5A         rep1           250   250   250          250          250   250   
KIF5A         rep2           250   250   250          250          250   250   
CLTC          rep1           250   250   250          250          250   250   
CLTC          rep2           250   250   250          250          250   250   
DCP1A         rep1           250   250   250          250          250   250   
DCP1A         rep2           250   250   250          250          250   250   
Calreticulin  rep1           250   250   250          250          250   250   
Calreticulin  rep2           250   250   250          250          250   250   
FUS           rep1           250   250   250          250          250   250   
FUS           rep2           250   250   250          250          250   250   
HNRNPA1       rep1           250   250   250          250          250   250   
HNRNPA1       rep2           250   250   250          250          250   250   
PML           rep1           250   250   250          250          250   250   
PML           rep2           250   250   250          250          250   250   
LAMP1         rep1           250   250   250          250          250   250   
LAMP1         rep2           250   250   250          250          250   250   
SNCA          rep1           250   250   250          250          250   250   
SNCA          rep2           250   250   250          250          250   250   
TIA1          rep1           250   250   250          250          250   250   
TIA1          rep2           250   250   250          250          250   250   
PURA          rep1           250   250   250          250          250   250   
PURA          rep2           250   250   250          250          250   250   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           250   250   250          250          250   250   
Tubulin       rep2           250   250   250          250          250   250   
Phalloidin    rep1           250   250   250          250          250   250   
Phalloidin    rep2           250   250   250          250          250   250   
TOMM20        rep1           250   250   250          250          250   250   
TOMM20        rep2           250   250   250          250          250   250   
DAPI          rep1          3000  3000  3000         3000         3000  3000   
DAPI          rep2          3000  3000  2999         3000         3000  3000   

             FUSHeterozygous SNCA  
Marker                             
G3BP1                    250  NaN  
G3BP1                    250  NaN  
NONO                     250  NaN  
NONO                     250  NaN  
SQSTM1                   250  NaN  
SQSTM1                   250  NaN  
PSD95                    250  NaN  
PSD95                    250  NaN  
NEMO                     250  NaN  
NEMO                     250  NaN  
GM130                    250  NaN  
GM130                    250  NaN  
NCL                      250  NaN  
NCL                      250  NaN  
LSM14A                   250  NaN  
LSM14A                   250  NaN  
TDP43                    250  NaN  
TDP43                    250  NaN  
ANXA11                   250  NaN  
ANXA11                   250  NaN  
PEX14                    250  NaN  
PEX14                    250  NaN  
mitotracker              250  NaN  
mitotracker              250  NaN  
FMRP                     250  NaN  
FMRP                     250  NaN  
SON                      250  NaN  
SON                      250  NaN  
KIF5A                    250  NaN  
KIF5A                    250  NaN  
CLTC                     250  NaN  
CLTC                     250  NaN  
DCP1A                    250  NaN  
DCP1A                    250  NaN  
Calreticulin             250  NaN  
Calreticulin             250  NaN  
FUS                      250  NaN  
FUS                      250  NaN  
HNRNPA1                  250  NaN  
HNRNPA1                  250  NaN  
PML                      250  NaN  
PML                      250  NaN  
LAMP1                    250  NaN  
LAMP1                    250  NaN  
SNCA                     250  NaN  
SNCA                     250  NaN  
TIA1                     250  NaN  
TIA1                     250  NaN  
PURA                     250  NaN  
PURA                     250  NaN  
CD41                     NaN  NaN  
CD41                     NaN  NaN  
Tubulin                  250  NaN  
Tubulin                  250  NaN  
Phalloidin               250  NaN  
Phalloidin               250  NaN  
TOMM20                   250  NaN  
TOMM20                   250  NaN  
DAPI                    3000  NaN  
DAPI                    3000  NaN  
========
batch7
Folder structure is valid.
No bad files are found.
Total Sites:  160000
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           250   250   250          250          250   250   
G3BP1         rep2           250   250   250          250          250   250   
NONO          rep1           250   250   250          250          250   250   
NONO          rep2           250   250   250          250          250   250   
SQSTM1        rep1           250   250   250          250          250   250   
SQSTM1        rep2           250   250   250          250          250   250   
PSD95         rep1           250   250   250          250          250   250   
PSD95         rep2           250   250   250          250          250   250   
NEMO          rep1           250   250   250          250          250   250   
NEMO          rep2           250   250   250          250          250   250   
GM130         rep1           250   250   250          250          250   250   
GM130         rep2           250   250   250          250          250   250   
NCL           rep1           250   250   250          250          250   250   
NCL           rep2           250   250   250          250          250   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   250          250          250   250   
TDP43         rep1           250   250   250          250          250   250   
TDP43         rep2           250   250   250          250          250   250   
ANXA11        rep1           250   250   250          250          250   250   
ANXA11        rep2           250   250   250          250          250   250   
PEX14         rep1           250   250   250          250          250   250   
PEX14         rep2           250   250   250          250          250   250   
mitotracker   rep1           250   250   250          250          250   250   
mitotracker   rep2           250   250   250          250          250   250   
FMRP          rep1           250   250   250          250          250   250   
FMRP          rep2           250   250   250          250          250   250   
SON           rep1           250   250   250          250          250   250   
SON           rep2           250   250   250          250          250   250   
KIF5A         rep1           250   250   250          250          250   250   
KIF5A         rep2           250   250   250          250          250   250   
CLTC          rep1           250   250   250          250          250   250   
CLTC          rep2           250   250   250          250          250   250   
DCP1A         rep1           250   250   250          250          250   250   
DCP1A         rep2           250   250   250          250          250   250   
Calreticulin  rep1           250   250   250          250          250   250   
Calreticulin  rep2           250   250   250          250          250   250   
FUS           rep1           250   250   250          250          250   250   
FUS           rep2           250   250   250          250          250   250   
HNRNPA1       rep1           250   250   250          250          250   250   
HNRNPA1       rep2           250   250   250          250          250   250   
PML           rep1           250   250   250          250          250   250   
PML           rep2           250   250   250          250          250   250   
LAMP1         rep1           250   250   250          250          250   250   
LAMP1         rep2           250   250   250          250          250   250   
SNCA          rep1           250   250   250          250          250   250   
SNCA          rep2           250   250   250          250          250   250   
TIA1          rep1           250   250   250          250          250   250   
TIA1          rep2           250   250   250          250          250   250   
PURA          rep1           250   250   250          250          250   250   
PURA          rep2           250   250   250          250          250   250   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           250   250   250          250          250   250   
Tubulin       rep2           250   250   250          250          250   250   
Phalloidin    rep1           250   250   250          250          250   250   
Phalloidin    rep2           250   250   250          250          250   250   
TOMM20        rep1           250   250   250          250          250   250   
TOMM20        rep2           250   250   250          250          250   250   
DAPI          rep1          3000  3000  3000         3000         3000  3000   
DAPI          rep2          3000  3000  3000         3000         3000  3000   

             FUSHeterozygous  SNCA  
Marker                              
G3BP1                    250   250  
G3BP1                    250   250  
NONO                     250   250  
NONO                     250   250  
SQSTM1                   250   250  
SQSTM1                   250   250  
PSD95                    250   250  
PSD95                    250   250  
NEMO                     250   250  
NEMO                     250   250  
GM130                    250   250  
GM130                    250   250  
NCL                      250   250  
NCL                      250   250  
LSM14A                   250   250  
LSM14A                   250   250  
TDP43                    250   250  
TDP43                    250   250  
ANXA11                   250   250  
ANXA11                   250   250  
PEX14                    250   250  
PEX14                    250   250  
mitotracker              250   250  
mitotracker              250   250  
FMRP                     250   250  
FMRP                     250   250  
SON                      250   250  
SON                      250   250  
KIF5A                    250   250  
KIF5A                    250   250  
CLTC                     250   250  
CLTC                     250   250  
DCP1A                    250   250  
DCP1A                    250   250  
Calreticulin             250   250  
Calreticulin             250   250  
FUS                      250   250  
FUS                      250   250  
HNRNPA1                  250   250  
HNRNPA1                  250   250  
PML                      250   250  
PML                      250   250  
LAMP1                    250   250  
LAMP1                    250   250  
SNCA                     250   250  
SNCA                     250   250  
TIA1                     250   250  
TIA1                     250   250  
PURA                     250   250  
PURA                     250   250  
CD41                     NaN   NaN  
CD41                     NaN   NaN  
Tubulin                  250   250  
Tubulin                  250   250  
Phalloidin               250   250  
Phalloidin               250   250  
TOMM20                   250   250  
TOMM20                   250   250  
DAPI                    3000  3000  
DAPI                    3000  3000  
========
batch8
Folder structure is valid.
No bad files are found.
Total Sites:  160000
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           250   250   250          250          250   250   
G3BP1         rep2           250   250   250          250          250   250   
NONO          rep1           250   250   250          250          250   250   
NONO          rep2           250   250   250          250          250   250   
SQSTM1        rep1           250   250   250          250          250   250   
SQSTM1        rep2           250   250   250          250          250   250   
PSD95         rep1           250   250   250          250          250   250   
PSD95         rep2           250   250   250          250          250   250   
NEMO          rep1           250   250   250          250          250   250   
NEMO          rep2           250   250   250          250          250   250   
GM130         rep1           250   250   250          250          250   250   
GM130         rep2           250   250   250          250          250   250   
NCL           rep1           250   250   250          250          250   250   
NCL           rep2           250   250   250          250          250   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   250          250          250   250   
TDP43         rep1           250   250   250          250          250   250   
TDP43         rep2           250   250   250          250          250   250   
ANXA11        rep1           250   250   250          250          250   250   
ANXA11        rep2           250   250   250          250          250   250   
PEX14         rep1           250   250   250          250          250   250   
PEX14         rep2           250   250   250          250          250   250   
mitotracker   rep1           250   250   250          250          250   250   
mitotracker   rep2           250   250   250          250          250   250   
FMRP          rep1           250   250   250          250          250   250   
FMRP          rep2           250   250   250          250          250   250   
SON           rep1           250   250   250          250          250   250   
SON           rep2           250   250   250          250          250   250   
KIF5A         rep1           250   250   250          250          250   250   
KIF5A         rep2           250   250   250          250          250   250   
CLTC          rep1           250   250   250          250          250   250   
CLTC          rep2           250   250   250          250          250   250   
DCP1A         rep1           250   250   250          250          250   250   
DCP1A         rep2           250   250   250          250          250   250   
Calreticulin  rep1           250   250   250          250          250   250   
Calreticulin  rep2           250   250   250          250          250   250   
FUS           rep1           250   250   250          250          250   250   
FUS           rep2           250   250   250          250          250   250   
HNRNPA1       rep1           250   250   250          250          250   250   
HNRNPA1       rep2           250   250   250          250          250   250   
PML           rep1           250   250   250          250          250   250   
PML           rep2           250   250   250          250          250   250   
LAMP1         rep1           250   250   250          250          250   250   
LAMP1         rep2           250   250   250          250          250   250   
SNCA          rep1           250   250   250          250          250   250   
SNCA          rep2           250   250   250          250          250   250   
TIA1          rep1           250   250   250          250          250   250   
TIA1          rep2           250   250   250          250          250   250   
PURA          rep1           250   250   250          250          250   250   
PURA          rep2           250   250   250          250          250   250   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           250   250   250          250          250   250   
Tubulin       rep2           250   250   250          250          250   250   
Phalloidin    rep1           250   250   250          250          250   250   
Phalloidin    rep2           250   250   250          250          250   250   
TOMM20        rep1           250   250   250          250          250   250   
TOMM20        rep2           250   250   250          250          250   250   
DAPI          rep1          3000  3000  3000         3000         3000  3000   
DAPI          rep2          3000  3000  3000         3000         3000  3000   

             FUSHeterozygous  SNCA  
Marker                              
G3BP1                    250   250  
G3BP1                    250   250  
NONO                     250   250  
NONO                     250   250  
SQSTM1                   250   250  
SQSTM1                   250   250  
PSD95                    250   250  
PSD95                    250   250  
NEMO                     250   250  
NEMO                     250   250  
GM130                    250   250  
GM130                    250   250  
NCL                      250   250  
NCL                      250   250  
LSM14A                   250   250  
LSM14A                   250   250  
TDP43                    250   250  
TDP43                    250   250  
ANXA11                   250   250  
ANXA11                   250   250  
PEX14                    250   250  
PEX14                    250   250  
mitotracker              250   250  
mitotracker              250   250  
FMRP                     250   250  
FMRP                     250   250  
SON                      250   250  
SON                      250   250  
KIF5A                    250   250  
KIF5A                    250   250  
CLTC                     250   250  
CLTC                     250   250  
DCP1A                    250   250  
DCP1A                    250   250  
Calreticulin             250   250  
Calreticulin             250   250  
FUS                      250   250  
FUS                      250   250  
HNRNPA1                  250   250  
HNRNPA1                  250   250  
PML                      250   250  
PML                      250   250  
LAMP1                    250   250  
LAMP1                    250   250  
SNCA                     250   250  
SNCA                     250   250  
TIA1                     250   250  
TIA1                     250   250  
PURA                     250   250  
PURA                     250   250  
CD41                     NaN   NaN  
CD41                     NaN   NaN  
Tubulin                  250   250  
Tubulin                  250   250  
Phalloidin               250   250  
Phalloidin               250   250  
TOMM20                   250   250  
TOMM20                   250   250  
DAPI                    3000  3000  
DAPI                    3000  3000  
========
batch9
Folder structure is valid.
No bad files are found.
Total Sites:  159996
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           250   250   250          250          250   250   
G3BP1         rep2           250   250   250          250          250   250   
NONO          rep1           250   250   250          250          250   250   
NONO          rep2           250   250   250          250          250   250   
SQSTM1        rep1           250   250   250          250          250   250   
SQSTM1        rep2           250   250   250          250          250   250   
PSD95         rep1           250   250   250          250          250   250   
PSD95         rep2           250   250   250          250          250   250   
NEMO          rep1           250   250   250          250          250   250   
NEMO          rep2           250   250   250          250          250   250   
GM130         rep1           250   250   250          250          250   250   
GM130         rep2           250   250   250          250          250   250   
NCL           rep1           250   250   250          250          250   250   
NCL           rep2           250   250   250          250          250   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   250          250          250   250   
TDP43         rep1           250   250   250          250          250   250   
TDP43         rep2           250   250   250          250          250   250   
ANXA11        rep1           250   250   250          250          250   250   
ANXA11        rep2           250   250   250          250          250   250   
PEX14         rep1           250   250   250          250          250   250   
PEX14         rep2           250   250   250          250          250   250   
mitotracker   rep1           250   250   250          250          250   250   
mitotracker   rep2           250   250   250          250          250   250   
FMRP          rep1           250   250   250          250          250   250   
FMRP          rep2           250   250   250          250          250   250   
SON           rep1           250   250   250          250          250   250   
SON           rep2           250   250   250          250          250   250   
KIF5A         rep1           250   250   250          250          250   250   
KIF5A         rep2           250   250   250          250          250   250   
CLTC          rep1           250   250   250          250          250   250   
CLTC          rep2           250   250   250          250          250   250   
DCP1A         rep1           250   250   250          250          250   250   
DCP1A         rep2           250   250   250          250          250   250   
Calreticulin  rep1           250   250   250          250          250   250   
Calreticulin  rep2           250   250   250          250          250   250   
FUS           rep1           250   250   250          250          250   250   
FUS           rep2           250   250   250          250          250   250   
HNRNPA1       rep1           250   250   250          250          250   250   
HNRNPA1       rep2           250   250   250          250          250   250   
PML           rep1           250   250   250          250          250   250   
PML           rep2           250   250   250          250          250   250   
LAMP1         rep1           250   250   250          250          250   250   
LAMP1         rep2           250   250   250          250          250   250   
SNCA          rep1           250   250   250          250          250   250   
SNCA          rep2           250   250   250          250          250   250   
TIA1          rep1           250   250   250          250          250   250   
TIA1          rep2           250   250   250          250          250   250   
PURA          rep1           250   250   250          250          250   250   
PURA          rep2           250   250   250          250          250   250   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           250   250   250          250          250   250   
Tubulin       rep2           250   250   250          250          250   250   
Phalloidin    rep1           250   250   250          250          250   250   
Phalloidin    rep2           250   250   250          250          250   250   
TOMM20        rep1           250   250   250          250          250   250   
TOMM20        rep2           250   250   250          250          250   250   
DAPI          rep1          3000  3000  3000         3000         3000  3000   
DAPI          rep2          3000  3000  3000         3000         3000  3000   

             FUSHeterozygous  SNCA  
Marker                              
G3BP1                    250   250  
G3BP1                    250   250  
NONO                     250   250  
NONO                     250   250  
SQSTM1                   250   249  
SQSTM1                   250   250  
PSD95                    250   250  
PSD95                    250   250  
NEMO                     250   250  
NEMO                     250   250  
GM130                    250   250  
GM130                    250   250  
NCL                      250   250  
NCL                      250   250  
LSM14A                   250   250  
LSM14A                   250   250  
TDP43                    250   250  
TDP43                    250   250  
ANXA11                   250   250  
ANXA11                   250   250  
PEX14                    250   250  
PEX14                    250   250  
mitotracker              250   250  
mitotracker              250   250  
FMRP                     250   250  
FMRP                     250   250  
SON                      250   250  
SON                      250   250  
KIF5A                    250   249  
KIF5A                    250   250  
CLTC                     250   250  
CLTC                     250   250  
DCP1A                    250   250  
DCP1A                    250   250  
Calreticulin             250   250  
Calreticulin             250   250  
FUS                      250   250  
FUS                      250   250  
HNRNPA1                  250   250  
HNRNPA1                  250   250  
PML                      250   250  
PML                      250   250  
LAMP1                    250   250  
LAMP1                    250   250  
SNCA                     250   250  
SNCA                     250   250  
TIA1                     250   250  
TIA1                     250   250  
PURA                     250   250  
PURA                     250   250  
CD41                     NaN   NaN  
CD41                     NaN   NaN  
Tubulin                  250   249  
Tubulin                  250   250  
Phalloidin               250   250  
Phalloidin               250   250  
TOMM20                   250   250  
TOMM20                   250   250  
DAPI                    3000  2999  
DAPI                    3000  3000  
========
====================

Processed Files Validation¶

  1. How many site npy files do we have in each folder? -> How many sites survived the pre-processing?
  2. Are all existing files valid? (at least 100kB, npy not corrupted)
In [10]:
procs = run_validate_folder_structure(root_directory_proc, True, 
                                      new_d8_panels, 
                                      new_d8_markers,
                                      PLOT_PATH,
                                      new_d8_marker_info,
                                      new_d8_cell_lines_to_cond, 
                                      new_d8_reps, 
                                      new_d8_cell_lines_for_disp, 
                                      new_d8_expected_dapi_raw,
                                      batches=batches,
                                      expected_count=250, 
                                      check_antibody=False)
batch1
Folder structure is invalid. Missing 8 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/FUSHomozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/TDP43/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/TBK1/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/WT/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/FUSRevertant/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/OPTN/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/FUSHeterozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/SNCA
No bad files are found.
Total Sites:  123380
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           200   188   205          223          245   238   
G3BP1         rep2           222   205   183          179          231   229   
NONO          rep1           191   205   223          201          215   226   
NONO          rep2           214   206   208          200          241   215   
SQSTM1        rep1           159   181   177          198          208   191   
SQSTM1        rep2           124   175   159          195          185   192   
PSD95         rep1           202   247   248          246          213   237   
PSD95         rep2           246   243   248          249          227   243   
NEMO          rep1           213   246   244          204          247   244   
NEMO          rep2           248   244   247          245          192   244   
GM130         rep1           248   250   250          249          247   250   
GM130         rep2           247   250   250          250          248   250   
NCL           rep1           241   250   249          250          248   249   
NCL           rep2           250   250   249          250          247   250   
LSM14A        rep1           250   248   250          250          171   250   
LSM14A        rep2           236   249   250          240          223   250   
TDP43         rep1           226   234   195          241          229   220   
TDP43         rep2           188   225   189          235          220   222   
ANXA11        rep1           189   236   226          211          182   201   
ANXA11        rep2           188   249   212          236          245   214   
PEX14         rep1           147   247   178          231          182   218   
PEX14         rep2           198   244   213          246          185   202   
mitotracker   rep1           148   226   220          219          144   234   
mitotracker   rep2           227   242   234          230          196   245   
FMRP          rep1           186   152   194          208          242   233   
FMRP          rep2           201   167   193          181          227   212   
SON           rep1           205   244   238          211          216   226   
SON           rep2           224   225   226          218          239   217   
KIF5A         rep1           199   207   203          202          216   194   
KIF5A         rep2           156   211   179          216          199   203   
CLTC          rep1           230   249   250          246          212   238   
CLTC          rep2           249   250   249          250          228   249   
DCP1A         rep1           214   250   247          203          242   248   
DCP1A         rep2           250   250   250          249          185   249   
Calreticulin  rep1           248   249   247          249          249   250   
Calreticulin  rep2           246   248   249          250          248   250   
FUS           rep1           240   250   250          250          249   249   
FUS           rep2           250   250   249          250          247   250   
HNRNPA1       rep1           240   234   240          247          164   250   
HNRNPA1       rep2           234   246   246          238          209   249   
PML           rep1           230   248   240          246          221   244   
PML           rep2           216   248   249          246          233   245   
LAMP1         rep1            81    77    77           87           60   113   
LAMP1         rep2            57    76    81           66           64   102   
SNCA          rep1            95   143   122          107          134   118   
SNCA          rep2           131   104   118          136          111   116   
TIA1          rep1           227   225   233          198          227   238   
TIA1          rep2           200   228   218          207          205   227   
PURA          rep1           155   170   203          187          208   198   
PURA          rep2           189   175   193          184          207   190   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           145   204   184          197          179   187   
Tubulin       rep2           103   206   160          213          183   203   
Phalloidin    rep1           209   233   233          230          188   225   
Phalloidin    rep2           203   238   221          238          209   239   
TOMM20        rep1           203   242   238          227          234   235   
TOMM20        rep2           220   232   239          228          211   233   
DAPI          rep1          2638  2961  2911         2847         2684  2868   
DAPI          rep2          2721  2939  2882         2916         2754  2871   

             FUSHeterozygous SNCA  
Marker                             
G3BP1                    249  NaN  
G3BP1                    247  NaN  
NONO                     241  NaN  
NONO                     244  NaN  
SQSTM1                   162  NaN  
SQSTM1                   164  NaN  
PSD95                    243  NaN  
PSD95                    246  NaN  
NEMO                     184  NaN  
NEMO                     228  NaN  
GM130                    249  NaN  
GM130                    250  NaN  
NCL                      248  NaN  
NCL                      250  NaN  
LSM14A                   233  NaN  
LSM14A                   243  NaN  
TDP43                    212  NaN  
TDP43                    225  NaN  
ANXA11                   242  NaN  
ANXA11                   238  NaN  
PEX14                    213  NaN  
PEX14                    222  NaN  
mitotracker              219  NaN  
mitotracker              198  NaN  
FMRP                     248  NaN  
FMRP                     243  NaN  
SON                      240  NaN  
SON                      248  NaN  
KIF5A                    170  NaN  
KIF5A                    206  NaN  
CLTC                     247  NaN  
CLTC                     246  NaN  
DCP1A                    178  NaN  
DCP1A                    227  NaN  
Calreticulin             248  NaN  
Calreticulin             248  NaN  
FUS                      248  NaN  
FUS                      250  NaN  
HNRNPA1                  229  NaN  
HNRNPA1                  246  NaN  
PML                      217  NaN  
PML                      220  NaN  
LAMP1                     66  NaN  
LAMP1                     82  NaN  
SNCA                     146  NaN  
SNCA                     133  NaN  
TIA1                     234  NaN  
TIA1                     218  NaN  
PURA                     216  NaN  
PURA                     208  NaN  
CD41                     NaN  NaN  
CD41                     NaN  NaN  
Tubulin                  132  NaN  
Tubulin                  147  NaN  
Phalloidin               170  NaN  
Phalloidin               188  NaN  
TOMM20                   236  NaN  
TOMM20                   210  NaN  
DAPI                    2801  NaN  
DAPI                    2903  NaN  
========
batch2
Folder structure is invalid. Missing 8 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/FUSHomozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/TDP43/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/TBK1/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/WT/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/FUSRevertant/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/OPTN/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/FUSHeterozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/SNCA
No bad files are found.
Total Sites:  123753
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           240   245   240          241          216   242   
G3BP1         rep2           230   245   231            0          194   240   
NONO          rep1           249   246   240          250          235   247   
NONO          rep2           249   208   250          249          244   248   
SQSTM1        rep1           168   159   215          208          204   216   
SQSTM1        rep2           218   165   224          211          188   228   
PSD95         rep1           238   246   250          236          229   247   
PSD95         rep2           249   248   249          248          226   250   
NEMO          rep1           244   247   248          242          141   245   
NEMO          rep2           238   173   244          241          200   243   
GM130         rep1           245   244   248          221          192   197   
GM130         rep2           248   237   249          184          214   250   
NCL           rep1           241   249   246          250          223   249   
NCL           rep2           240   178   213          199          235   249   
LSM14A        rep1           248   250   232          250          242   250   
LSM14A        rep2           243   246   248          248          231   250   
TDP43         rep1           238   231   236          242          210   244   
TDP43         rep2           233   234   234          241          214   230   
ANXA11        rep1           227   237   229          241          233   230   
ANXA11        rep2           204   215   247          240          188   238   
PEX14         rep1           236   238   248          239          246   248   
PEX14         rep2           233   236   245          247           32   243   
mitotracker   rep1           210   242   239          232          210   248   
mitotracker   rep2            88   242   237          249          188   247   
FMRP          rep1           238   235   232          238          209   238   
FMRP          rep2           228   243   225            0          192   227   
SON           rep1           249   247   240          250          238   250   
SON           rep2           249   206   250          250          245   249   
KIF5A         rep1           183   177   229          231          226   231   
KIF5A         rep2           236   183   232          224          209   236   
CLTC          rep1           237   247   250          236          229   248   
CLTC          rep2           249   249   250          250          227   250   
DCP1A         rep1           250   250   249          245          141   250   
DCP1A         rep2           248   172   248          250          202   250   
Calreticulin  rep1           250   244   249          215          201   196   
Calreticulin  rep2           247   235   248          180          240   250   
FUS           rep1           247   244   247          248          217   249   
FUS           rep2           249   170   210          196          238   249   
HNRNPA1       rep1           244   249   228          249          235   250   
HNRNPA1       rep2           241   245   246          247          225   249   
PML           rep1           239   240   245          242          186   228   
PML           rep2           216   242   247          246          241   248   
LAMP1         rep1            96   124   138           95           66   130   
LAMP1         rep2            83   130   148           92           81    80   
SNCA          rep1           124   116   114          152          138   140   
SNCA          rep2           152   142   100          165           72   104   
TIA1          rep1           162   224   224          210          190   221   
TIA1          rep2            84   220   218          224          171   214   
PURA          rep1           202   206   199          202          187   211   
PURA          rep2           193   202   184            0          157   185   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           181   175   240          225          216   227   
Tubulin       rep2           223   159   222          230          209   234   
Phalloidin    rep1           183   198   221          221          209   222   
Phalloidin    rep2           203   203   222          218          210   229   
TOMM20        rep1           193   219   222          211          194   235   
TOMM20        rep2            72   226   233          229          193   234   
DAPI          rep1          2862  2908  2940         2918         2662  2925   
DAPI          rep2          2761  2696  2928         2719         2580  2975   

             FUSHeterozygous SNCA  
Marker                             
G3BP1                    244  NaN  
G3BP1                    224  NaN  
NONO                     207  NaN  
NONO                     202  NaN  
SQSTM1                   199  NaN  
SQSTM1                   208  NaN  
PSD95                    250  NaN  
PSD95                    249  NaN  
NEMO                     130  NaN  
NEMO                     216  NaN  
GM130                    247  NaN  
GM130                    249  NaN  
NCL                      248  NaN  
NCL                      248  NaN  
LSM14A                   247  NaN  
LSM14A                   240  NaN  
TDP43                    232  NaN  
TDP43                    234  NaN  
ANXA11                   235  NaN  
ANXA11                   190  NaN  
PEX14                    234  NaN  
PEX14                    223  NaN  
mitotracker              224  NaN  
mitotracker              212  NaN  
FMRP                     243  NaN  
FMRP                     215  NaN  
SON                      207  NaN  
SON                      201  NaN  
KIF5A                    215  NaN  
KIF5A                    221  NaN  
CLTC                     250  NaN  
CLTC                     249  NaN  
DCP1A                    123  NaN  
DCP1A                    226  NaN  
Calreticulin             248  NaN  
Calreticulin             250  NaN  
FUS                      247  NaN  
FUS                      248  NaN  
HNRNPA1                  249  NaN  
HNRNPA1                  242  NaN  
PML                      225  NaN  
PML                      209  NaN  
LAMP1                     88  NaN  
LAMP1                     99  NaN  
SNCA                      98  NaN  
SNCA                     136  NaN  
TIA1                     173  NaN  
TIA1                     204  NaN  
PURA                     197  NaN  
PURA                     188  NaN  
CD41                     NaN  NaN  
CD41                     NaN  NaN  
Tubulin                  190  NaN  
Tubulin                  224  NaN  
Phalloidin               139  NaN  
Phalloidin               156  NaN  
TOMM20                   209  NaN  
TOMM20                   204  NaN  
DAPI                    2768  NaN  
DAPI                    2801  NaN  
========
batch3
Folder structure is invalid. Missing 8 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/FUSHomozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/TDP43/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/TBK1/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/WT/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/FUSRevertant/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/OPTN/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/FUSHeterozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/SNCA
No bad files are found.
Total Sites:  124029
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           247   247   238          246          222   242   
G3BP1         rep2           220   237   194          235          161   200   
NONO          rep1           187   225   249          250          167   193   
NONO          rep2           181   241   246          228          186   186   
SQSTM1        rep1           169   206   183          183          196   197   
SQSTM1        rep2           158   214   220          234          179   219   
PSD95         rep1           249   243   247          213          246   249   
PSD95         rep2           248   249   243          246          229   248   
NEMO          rep1           249   240   236          243          234   244   
NEMO          rep2           243   239   199          247          238   247   
GM130         rep1           248   197   246          250          243   193   
GM130         rep2           249   225   250          221          246   244   
NCL           rep1           249   250   227          217          248   250   
NCL           rep2           248   245   245          250          250   250   
LSM14A        rep1           250   250   244          250          241   250   
LSM14A        rep2           246   250   245          250          224   250   
TDP43         rep1           228   228   236          233          207   235   
TDP43         rep2           195   235   232          237          220   238   
ANXA11        rep1           236   227   235          227          221   240   
ANXA11        rep2           236   219   232          238          230   242   
PEX14         rep1           246   246   248          222          182   249   
PEX14         rep2           240   247   247          248          185   247   
mitotracker   rep1           235   247   249          247          218   248   
mitotracker   rep2           240   240   245          249          234   249   
FMRP          rep1           244   234   229          235          208   226   
FMRP          rep2           216   207   182          226          154   189   
SON           rep1           185   225   247          250          165   191   
SON           rep2           180   249   249          227          185   188   
KIF5A         rep1           174   196   212          189          201   205   
KIF5A         rep2           155   198   235          211          187   223   
CLTC          rep1           250   240   249          197          246   228   
CLTC          rep2           250   250   245          248          228   248   
DCP1A         rep1           246   246   232          247          237   248   
DCP1A         rep2           246   249   183          243          246   247   
Calreticulin  rep1           250   193   248          245          244   188   
Calreticulin  rep2           247   218   247          215          249   239   
FUS           rep1           250   250   222          214          247   249   
FUS           rep2           250   244   247          249          248   250   
HNRNPA1       rep1           249   250   238          248          234   250   
HNRNPA1       rep2           249   250   244          247          210   250   
PML           rep1           214   244   243          194          232   245   
PML           rep2           192   235   202          189          214   223   
LAMP1         rep1            68    57    53           64           83    59   
LAMP1         rep2            44    38   100           51           95    71   
SNCA          rep1           141   105   122          137          112   116   
SNCA          rep2           139   125   105          150          126    88   
TIA1          rep1           221   206   225          217          226   234   
TIA1          rep2           224   214   221          222          217   222   
PURA          rep1           181   201   173          205          190   202   
PURA          rep2           176   181   177          205          145   177   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           159   213   189          179          187   219   
Tubulin       rep2           135   220   229          238          190   210   
Phalloidin    rep1           222   230   225          230          205   188   
Phalloidin    rep2           207   236   235          237          215   238   
TOMM20        rep1           235   239   232          235          227   238   
TOMM20        rep2           236   231   230          241          233   225   
DAPI          rep1          2845  2898  2915         2871         2766  2866   
DAPI          rep2          2771  2931  2865         2917         2712  2886   

             FUSHeterozygous SNCA  
Marker                             
G3BP1                    217  NaN  
G3BP1                    204  NaN  
NONO                     181  NaN  
NONO                     185  NaN  
SQSTM1                   216  NaN  
SQSTM1                   163  NaN  
PSD95                    242  NaN  
PSD95                    246  NaN  
NEMO                     238  NaN  
NEMO                     238  NaN  
GM130                    232  NaN  
GM130                    241  NaN  
NCL                      226  NaN  
NCL                      247  NaN  
LSM14A                   225  NaN  
LSM14A                   221  NaN  
TDP43                    219  NaN  
TDP43                    207  NaN  
ANXA11                   230  NaN  
ANXA11                   178  NaN  
PEX14                    215  NaN  
PEX14                    237  NaN  
mitotracker              231  NaN  
mitotracker              231  NaN  
FMRP                     205  NaN  
FMRP                     194  NaN  
SON                      186  NaN  
SON                      185  NaN  
KIF5A                    228  NaN  
KIF5A                    176  NaN  
CLTC                     243  NaN  
CLTC                     248  NaN  
DCP1A                    228  NaN  
DCP1A                    238  NaN  
Calreticulin             242  NaN  
Calreticulin             245  NaN  
FUS                      238  NaN  
FUS                      247  NaN  
HNRNPA1                  219  NaN  
HNRNPA1                  222  NaN  
PML                      187  NaN  
PML                      186  NaN  
LAMP1                    100  NaN  
LAMP1                     86  NaN  
SNCA                     103  NaN  
SNCA                     118  NaN  
TIA1                     227  NaN  
TIA1                     211  NaN  
PURA                     181  NaN  
PURA                     163  NaN  
CD41                     NaN  NaN  
CD41                     NaN  NaN  
Tubulin                  223  NaN  
Tubulin                  157  NaN  
Phalloidin               210  NaN  
Phalloidin               195  NaN  
TOMM20                   229  NaN  
TOMM20                   230  NaN  
DAPI                    2790  NaN  
DAPI                    2691  NaN  
========
batch7
Folder structure is invalid. Missing 8 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/FUSHomozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/TDP43/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/TBK1/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/WT/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/FUSRevertant/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/OPTN/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/FUSHeterozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/SNCA/Untreated/CD41
No bad files are found.
Total Sites:  132800
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           184   214   211          219          184   220   
G3BP1         rep2           238   204   107          164          204   213   
NONO          rep1           218   237    11          124          142   231   
NONO          rep2           219   238   220          227          211   241   
SQSTM1        rep1           178   120   163          169          136   164   
SQSTM1        rep2           159   209   212          197          125   198   
PSD95         rep1           240   242   178          167          192   247   
PSD95         rep2           191   248   227          243          149   248   
NEMO          rep1           206   231   237          187          216   239   
NEMO          rep2           213   230   238          186          211   238   
GM130         rep1           249   250   250          250          250   250   
GM130         rep2           250   236   249          249          250   250   
NCL           rep1           233   250   249          249          249   249   
NCL           rep2           248   249   249          250          247   250   
LSM14A        rep1           243   248   248          247          249   248   
LSM14A        rep2           245   249   250          248          248   249   
TDP43         rep1           240   242   231          244          138   246   
TDP43         rep2           234   238   249          225          146   242   
ANXA11        rep1           246   155   171          238          206   230   
ANXA11        rep2           238   242   240          220          222   238   
PEX14         rep1           150   247   119          221          168   246   
PEX14         rep2           246   236   221          169          174   244   
mitotracker   rep1           248   156   244          242          241   246   
mitotracker   rep2           245   233   232          240          243   241   
FMRP          rep1           170   206   211          211          172   211   
FMRP          rep2           214   177    94          147          186   204   
SON           rep1           236   242    13          165          196   235   
SON           rep2           239   240   240          241          231   242   
KIF5A         rep1           208   136   182          193          156   188   
KIF5A         rep2           204   226   229          208          160   224   
CLTC          rep1           249   242   176          247          193   247   
CLTC          rep2           197   249   227          250          149   249   
DCP1A         rep1           243   248   245          245          241   250   
DCP1A         rep2           233   243   244          249          240   248   
Calreticulin  rep1           242   250   247          241          238   248   
Calreticulin  rep2           247   246   248          250          220   248   
FUS           rep1             4     3     1            2            5     0   
FUS           rep2             1     0     4            2            0     1   
HNRNPA1       rep1           246   247   249          229          232   246   
HNRNPA1       rep2           247   246   248          223          236   249   
PML           rep1           249   248   232          242          207   249   
PML           rep2           249   246   249          250          243   249   
LAMP1         rep1           139    99   101          124          157   143   
LAMP1         rep2           123   122   209           79          151   186   
SNCA          rep1            88   104    95           90           88    51   
SNCA          rep2           128   125   151          108          130   128   
TIA1          rep1           206   152   178          174          229   238   
TIA1          rep2           201   189   163          135          228   198   
PURA          rep1           168   196   194          203          185   198   
PURA          rep2           205   181   102          209          185   193   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           243   139   170          239          171   171   
Tubulin       rep2           240   226   234          239          168   212   
Phalloidin    rep1           237   226   174          227          190   243   
Phalloidin    rep2           180   237   220          222          143   243   
TOMM20        rep1           246   162   249          249          247   248   
TOMM20        rep2           245   245   236          246          241   249   
DAPI          rep1          2817  2695  2428         2880         2737  2909   
DAPI          rep2          2916  2941  2803         2909         2747  2955   

             FUSHeterozygous  SNCA  
Marker                              
G3BP1                    196    71  
G3BP1                    230   132  
NONO                     181   217  
NONO                     138   213  
SQSTM1                   166   171  
SQSTM1                   138   131  
PSD95                    175   209  
PSD95                     80   100  
NEMO                     158   232  
NEMO                     140   233  
GM130                    250   100  
GM130                    250    83  
NCL                      209   217  
NCL                      221   245  
LSM14A                   239   211  
LSM14A                   243   182  
TDP43                    202   220  
TDP43                    203   199  
ANXA11                   218   226  
ANXA11                   245   188  
PEX14                    148   163  
PEX14                    239   202  
mitotracker              246   192  
mitotracker              241   200  
FMRP                     176    57  
FMRP                     173   123  
SON                      225   222  
SON                      216   217  
KIF5A                    207   200  
KIF5A                    185   155  
CLTC                     206   234  
CLTC                     155    99  
DCP1A                    232   190  
DCP1A                    204   192  
Calreticulin             220   250  
Calreticulin             221   248  
FUS                        2     2  
FUS                        1     0  
HNRNPA1                  217   243  
HNRNPA1                  235   196  
PML                      241   226  
PML                      248   227  
LAMP1                     84    68  
LAMP1                    149    99  
SNCA                     112   100  
SNCA                     179   154  
TIA1                     199   208  
TIA1                     198   205  
PURA                     167    65  
PURA                     197   115  
CD41                     NaN   NaN  
CD41                     NaN   NaN  
Tubulin                  206   211  
Tubulin                  185   186  
Phalloidin               202   209  
Phalloidin               218    89  
TOMM20                   243   213  
TOMM20                   246   216  
DAPI                    2769  2579  
DAPI                    2877  2476  
========
batch8
Folder structure is invalid. Missing 8 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/FUSHomozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/TDP43/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/TBK1/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/WT/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/FUSRevertant/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/OPTN/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/FUSHeterozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/SNCA/Untreated/CD41
No bad files are found.
Total Sites:  145664
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           244   236   189          235          219   228   
G3BP1         rep2           234   219   184          218          216   214   
NONO          rep1           238   217   249          226          225   228   
NONO          rep2           246   233   247          224          223   226   
SQSTM1        rep1           156   125   184          141          150   167   
SQSTM1        rep2           183   180   172          169          184   159   
PSD95         rep1           237   235   218          209          249   222   
PSD95         rep2           247   249   250           17          248   249   
NEMO          rep1           248   250   246          250          248   247   
NEMO          rep2           231   248   247          247          248   240   
GM130         rep1           250   250   250          250          250   249   
GM130         rep2           250   250   250          250          250   249   
NCL           rep1           249   248   249          250          247   250   
NCL           rep2           250   249   248          250          240   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   249          250          249   250   
TDP43         rep1           236   242   245          244          134   238   
TDP43         rep2           243   248   237          248          213   235   
ANXA11        rep1           242   242   242          240          234   241   
ANXA11        rep2           242   241   233          237          228   241   
PEX14         rep1           246   250   247          246          146   239   
PEX14         rep2           241   242   246          247          203   240   
mitotracker   rep1           248   250   250          232          233   247   
mitotracker   rep2           246   241   236          246          247   243   
FMRP          rep1           233   212   182          214          200   224   
FMRP          rep2           212   197   172          200          192   205   
SON           rep1           246   238   250          249          244   240   
SON           rep2           250   243   249          242          244   225   
KIF5A         rep1           181   150   191          166          188   192   
KIF5A         rep2           188   206   173          187          219   173   
CLTC          rep1           231   237   219          228          248   225   
CLTC          rep2           247   250   250          181          250   250   
DCP1A         rep1           250   250   249          250          249   249   
DCP1A         rep2           231   250   249          247          246   249   
Calreticulin  rep1           250   248   250          250          250   250   
Calreticulin  rep2           250   249   248          250          250   250   
FUS           rep1           250   249   248          250          248   250   
FUS           rep2           249   249   248          250          250   250   
HNRNPA1       rep1           242   249   250          248          226   249   
HNRNPA1       rep2           246   247   245          249          222   249   
PML           rep1           248   239   224          189          219   250   
PML           rep2           242   218   197          231          234   246   
LAMP1         rep1           168   112   176          145          164   124   
LAMP1         rep2           132   162   209          184          128   165   
SNCA          rep1            95   123   101           92          159   115   
SNCA          rep2           133   174   173           99          101   117   
TIA1          rep1           226   202   195          213          233   230   
TIA1          rep2           205   197   215          161          240   214   
PURA          rep1           197   211   172          186          193   204   
PURA          rep2           186   182   149          182          196   182   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           235   198   244          225          240   220   
Tubulin       rep2           242   208   202          216          238   208   
Phalloidin    rep1           230   214   205          230          232   220   
Phalloidin    rep2           225   238   228          193          234   246   
TOMM20        rep1           239   246   250          247          232   246   
TOMM20        rep2           230   248   247          246          219   248   
DAPI          rep1          2977  2934  2902         2972         2910  2939   
DAPI          rep2          2958  2950  2887         2931         2925  2919   

             FUSHeterozygous  SNCA  
Marker                              
G3BP1                    239   204  
G3BP1                    222   228  
NONO                     202   240  
NONO                     207   242  
SQSTM1                   142   140  
SQSTM1                   164   167  
PSD95                    239   228  
PSD95                    224   246  
NEMO                     248   218  
NEMO                     249   234  
GM130                    250   250  
GM130                    249   248  
NCL                      250   249  
NCL                      248   247  
LSM14A                   250   248  
LSM14A                   250   246  
TDP43                    234   196  
TDP43                    236   140  
ANXA11                   237   236  
ANXA11                   232   247  
PEX14                    196   176  
PEX14                    191   141  
mitotracker              240   248  
mitotracker              245   245  
FMRP                     201   215  
FMRP                     196   229  
SON                      237   250  
SON                      221   247  
KIF5A                    169   185  
KIF5A                    202   199  
CLTC                     235   232  
CLTC                     226   246  
DCP1A                    249   220  
DCP1A                    249   235  
Calreticulin             250   250  
Calreticulin             249   249  
FUS                      250   250  
FUS                      250   250  
HNRNPA1                  241   230  
HNRNPA1                  227   231  
PML                      224   246  
PML                      236   225  
LAMP1                    142   128  
LAMP1                    152   147  
SNCA                     109   177  
SNCA                     115   158  
TIA1                     201   237  
TIA1                     213   226  
PURA                     195   200  
PURA                     182   200  
CD41                     NaN   NaN  
CD41                     NaN   NaN  
Tubulin                  209   216  
Tubulin                  213   210  
Phalloidin               212   215  
Phalloidin               197   232  
TOMM20                   193   107  
TOMM20                   219   212  
DAPI                    2916  2891  
DAPI                    2873  2874  
========
batch9
Folder structure is invalid. Missing 8 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/FUSHomozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/TDP43/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/TBK1/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/WT/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/FUSRevertant/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/OPTN/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/FUSHeterozygous/Untreated/CD41
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/SNCA/Untreated/CD41
No bad files are found.
Total Sites:  130955
df_reset (60, 9) colored_df (60, 9)
               Rep FUSHomozygous TDP43  TBK1 WT Untreated FUSRevertant  OPTN  \
Marker                                                                         
G3BP1         rep1           240   232   228          238          232   238   
G3BP1         rep2           244   246   232          240          231   237   
NONO          rep1           250   223   246            0          224   250   
NONO          rep2           250   228   206          250          230   248   
SQSTM1        rep1           157   179   114          148          186   138   
SQSTM1        rep2           156   160   151          159          170   148   
PSD95         rep1           249   245   223          249          237   237   
PSD95         rep2           247   246   249          245          248   230   
NEMO          rep1           250   249   249          248          249   248   
NEMO          rep2           249   250   249          248          246   250   
GM130         rep1           249   250   249          250          250     0   
GM130         rep2           250   249   250          250          250   249   
NCL           rep1           250   250   250          249          248   250   
NCL           rep2           250   249   250          246          244   250   
LSM14A        rep1           250   250   250          250          250   250   
LSM14A        rep2           250   250   250          249          250   250   
TDP43         rep1            92   212    98          221           26   147   
TDP43         rep2           241   192   158          218          174   171   
ANXA11        rep1            58   178    48          156          132   138   
ANXA11        rep2            46   115   100          149           56    51   
PEX14         rep1           112   173   138          185          105   131   
PEX14         rep2            91   147   148          216          140   144   
mitotracker   rep1           218   224   191          250          199   200   
mitotracker   rep2           237   242   242          247          101   237   
FMRP          rep1           237   227   224          224          228   241   
FMRP          rep2           241   243   225          198          218   233   
SON           rep1           249   225   248            4          242   249   
SON           rep2           250   227   206          249          244   247   
KIF5A         rep1           189   213   146          192          219   202   
KIF5A         rep2           189   193   185          189          220   187   
CLTC          rep1           250   249   222          250          240   237   
CLTC          rep2           249   244   249          244          250   236   
DCP1A         rep1           250   248   250          250          250   250   
DCP1A         rep2           250   250   250          250          248   250   
Calreticulin  rep1           247   250   249          250          247     0   
Calreticulin  rep2           250   248   250          248          249   249   
FUS           rep1           248   250   250          249          250   250   
FUS           rep2           249   250   250          247          250   250   
HNRNPA1       rep1           247   248   249          244          250   247   
HNRNPA1       rep2           248   249   249          241          229   250   
PML           rep1            88   222   101          243           25   159   
PML           rep2           242   195   160          245          178   158   
LAMP1         rep1            47   217   118          129          154   200   
LAMP1         rep2            43   118   113          122           89    92   
SNCA          rep1            69   111    97          103           71   100   
SNCA          rep2            49    77    95          137          108   103   
TIA1          rep1            97   166   101          154          177   188   
TIA1          rep2           159   170   177          132          100   230   
PURA          rep1           191   203   195          187          196   201   
PURA          rep2           190   180   200          214          200   217   
CD41          rep1           NaN   NaN   NaN          NaN          NaN   NaN   
CD41          rep2           NaN   NaN   NaN          NaN          NaN   NaN   
Tubulin       rep1           240   235   188          242          243   237   
Tubulin       rep2           196   175   201          212          237   222   
Phalloidin    rep1           208   234   205          220          229   231   
Phalloidin    rep2           222   233   214          190          240   230   
TOMM20        rep1           221   227   195          248          199   197   
TOMM20        rep2           244   249   248          249          106   248   
DAPI          rep1          2502  2843  2485         2661         2524  2493   
DAPI          rep2          2613  2702  2626         2861         2551  2665   

             FUSHeterozygous  SNCA  
Marker                              
G3BP1                    242   163  
G3BP1                    250   215  
NONO                     241   211  
NONO                     240   228  
SQSTM1                   166   179  
SQSTM1                   182   167  
PSD95                    249   229  
PSD95                    245   213  
NEMO                     249   238  
NEMO                     249   248  
GM130                    250   249  
GM130                    250   248  
NCL                      250   247  
NCL                      246   243  
LSM14A                   250   249  
LSM14A                   250   245  
TDP43                     47    55  
TDP43                    120   190  
ANXA11                    70    84  
ANXA11                    57   119  
PEX14                     74   157  
PEX14                     90    98  
mitotracker              249   138  
mitotracker               85   109  
FMRP                     236   159  
FMRP                     246   208  
SON                      246   219  
SON                      245   230  
KIF5A                    202   206  
KIF5A                    228   202  
CLTC                     250   238  
CLTC                     250   240  
DCP1A                    250   232  
DCP1A                    250   248  
Calreticulin             247   248  
Calreticulin             248   250  
FUS                      250   248  
FUS                      250   250  
HNRNPA1                  235   248  
HNRNPA1                  235   231  
PML                       47    45  
PML                      155   211  
LAMP1                     60    57  
LAMP1                     50    91  
SNCA                      61   119  
SNCA                      74   128  
TIA1                     158   113  
TIA1                      71    95  
PURA                     213   134  
PURA                     183   178  
CD41                     NaN   NaN  
CD41                     NaN   NaN  
Tubulin                  244   200  
Tubulin                  246   220  
Phalloidin               217   216  
Phalloidin               221   227  
TOMM20                   232   126  
TOMM20                    79   101  
DAPI                    2471  2307  
DAPI                    2418  2570  
========
====================

Difference between Raw and Processed¶

In [11]:
display_diff(batches, raws, procs, PLOT_PATH)
batch1
========
batch2
========
batch3
========
batch7
========
batch8
========
batch9
========

Variance in each batch (of processed files)¶

In [12]:
for batch in batches:
    with contextlib.redirect_stdout(io.StringIO()):
        var = sample_and_calc_variance(root_directory_proc, batch, 
                                       sample_size_per_markers=200, num_markers=30)
    print(f'{batch} var: ',var)
batch1 var:  0.05036649880196232
batch2 var:  0.05258532665007061
batch3 var:  0.0521106516308303
batch7 var:  0.049995200746055984
batch8 var:  0.05076662719671258
batch9 var:  0.05090705293553934

Preprocessing Filtering qc¶

By order of filtering

1. % site survival after Brenner on DAPI channel¶

Percentage out of the total sites

In [13]:
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,
                                                         batches, 
                                                         new_d8_line_colors, 
                                                         new_d8_panels, 
                                                         new_d8_reps,
                                                         vmax=250,
                                                         to_ignore={'cell_line_cond':'SNCA','batch':['batch1','batch2','batch3']}
                                                        )

2. % Site survival after Cellpose¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if Cellpose found 0 cells in it.

In [14]:
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, 
                                                           batches, 
                                                           dapi_filter_by_brenner, 
                                                           new_d8_line_colors, 
                                                           new_d8_panels, 
                                                           new_d8_reps,
                                                           figsize=(7,5), 
                                                           to_ignore={'cell_line_cond':['SNCA'],'batch':['batch1','batch2','batch3']}
                                                          )

3. % Site survival by tiling¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if after tiling, no tile is containing at least one whole cell that Cellpose detected.

In [15]:
dapi_filter_by_tiling = show_site_survival_dapi_tiling(df_dapi, 
                                                      batches, 
                                                      dapi_filter_by_cellpose, 
                                                      new_d8_line_colors, 
                                                      new_d8_panels, 
                                                      new_d8_reps,
                                                      figsize=(7,5),
                                                      to_ignore={'cell_line_cond':['SNCA'],'batch':['batch1','batch2','batch3']}
                                                     )

4. % Site survival after Brenner on target channel¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).

In [16]:
show_site_survival_target_brenner(df_dapi, 
                                  df_target, 
                                  dapi_filter_by_tiling, 
                                  new_d8_markers, 
                                  figsize=(7,8))

Note that Batch 10 should be removed! ¶

also check PD line ¶

what about marker CD41? ¶

Statistics About the Processed Files¶

In [17]:
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, new_d8_markers)

Total tiles - without CD41 marker¶

In [18]:
total_sum[~total_sum.marker.str.contains('CD41', regex=True)].n_valid_tiles.sum()
Out[18]:
7870227
In [19]:
## Total tiles in wt lines
total_sum[(~total_sum.marker.str.contains('CD41', regex=True))&
         (total_sum.cell_line_cond.isin(['WT stress', 'WT Untreated']))].n_valid_tiles.sum()
Out[19]:
1253833
In [20]:
## Total tiles in untreated lines
total_sum[(~total_sum.marker.str.contains('CD41', regex=True))&
         ((~total_sum.cell_line_cond.str.contains('WT')) | (total_sum.cell_line_cond=='WT Untreated'))].n_valid_tiles.sum()
Out[20]:
7870227

Total whole nuclei in tiles¶

In [21]:
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
Out[21]:
1536287.0

Total nuclei in sites¶

In [22]:
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
Out[22]:
6981999.0
In [23]:
show_total_sum_tables(total_sum)
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch1
count 5.740000e+02 574.000000 574.000000 5.740000e+02
mean 2.228162e+03 22.281620 1354.630662 5.722444e+03
std 7.139471e+02 7.139471 510.552754 1.816201e+03
min 4.700000e+02 4.700000 263.000000 1.131000e+03
25% 1.658250e+03 16.582500 1026.000000 4.587000e+03
50% 2.262000e+03 22.620000 1303.000000 5.642000e+03
75% 2.762250e+03 27.622500 1710.500000 7.051000e+03
max 3.763000e+03 37.630000 2646.000000 1.027900e+04
sum 1.278965e+06 NaN 777558.000000 3.284683e+06
expected_count 4.500000e+02 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch10
count 656.000000 656.000000 656.000000 6.560000e+02
mean 1508.341463 15.083415 978.745427 4.272267e+03
std 1028.037629 10.280376 703.474071 2.863482e+03
min 6.000000 0.060000 0.000000 1.800000e+01
25% 522.750000 5.227500 367.000000 1.632500e+03
50% 1446.500000 14.465000 910.000000 4.153000e+03
75% 2384.250000 23.842500 1508.250000 6.633000e+03
max 3646.000000 36.460000 2838.000000 1.107800e+04
sum 989472.000000 NaN 642057.000000 2.802607e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch2
count 572.000000 572.000000 572.000000 5.720000e+02
mean 1581.059441 15.810594 849.428322 3.946177e+03
std 474.353423 4.743534 257.416220 1.051369e+03
min 183.000000 1.830000 67.000000 4.770000e+02
25% 1269.250000 12.692500 672.000000 3.237000e+03
50% 1658.500000 16.585000 878.500000 4.045000e+03
75% 1926.000000 19.260000 1048.500000 4.695000e+03
max 2511.000000 25.110000 1670.000000 6.325000e+03
sum 904366.000000 NaN 485873.000000 2.257213e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch3
count 574.000000 574.000000 574.000000 5.740000e+02
mean 1651.623693 16.516237 862.040070 4.084611e+03
std 509.486609 5.094866 269.189087 1.110487e+03
min 337.000000 3.370000 163.000000 7.830000e+02
25% 1309.250000 13.092500 690.000000 3.449500e+03
50% 1719.500000 17.195000 860.000000 4.158500e+03
75% 2003.000000 20.030000 1030.000000 4.891500e+03
max 2802.000000 28.020000 1696.000000 6.807000e+03
sum 948032.000000 NaN 494811.000000 2.344567e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch7
count 6.530000e+02 653.000000 653.000000 6.530000e+02
mean 1.882706e+03 18.827060 1182.052067 5.608830e+03
std 7.625794e+02 7.625794 480.095645 1.970072e+03
min 5.000000e+00 0.050000 2.000000 1.500000e+01
25% 1.394000e+03 13.940000 874.000000 4.565000e+03
50% 2.019000e+03 20.190000 1194.000000 5.953000e+03
75% 2.398000e+03 23.980000 1456.000000 6.769000e+03
max 3.571000e+03 35.710000 2472.000000 9.793000e+03
sum 1.229407e+06 NaN 771880.000000 3.662566e+06
expected_count 4.500000e+02 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch8
count 6.560000e+02 656.000000 656.000000 6.560000e+02
mean 2.176212e+03 21.762119 1302.580793 5.994723e+03
std 6.821452e+02 6.821452 362.419717 1.388579e+03
min 1.900000e+02 1.900000 130.000000 4.830000e+02
25% 1.832000e+03 18.320000 1056.750000 5.054250e+03
50% 2.281500e+03 22.815000 1299.000000 6.037000e+03
75% 2.681000e+03 26.810000 1573.000000 7.120750e+03
max 3.458000e+03 34.580000 2327.000000 9.124000e+03
sum 1.427595e+06 NaN 854493.000000 3.932538e+06
expected_count 4.500000e+02 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch9
count 6.530000e+02 653.000000 653.000000 6.530000e+02
mean 2.018032e+03 20.180322 1220.004594 5.591424e+03
std 8.400946e+02 8.400946 451.891439 1.969414e+03
min 0.000000e+00 0.000000 8.000000 1.100000e+01
25% 1.304000e+03 13.040000 933.000000 4.382000e+03
50% 2.221000e+03 22.210000 1266.000000 6.016000e+03
75% 2.664000e+03 26.640000 1546.000000 6.982000e+03
max 3.475000e+03 34.750000 2128.000000 9.442000e+03
sum 1.317775e+06 NaN 796663.000000 3.651200e+06
expected_count 4.500000e+02 450.000000 450.000000 4.500000e+02
n valid tiles % valid tiles site_whole_cells_counts_sum site_cell_count
All batches
count 4.338000e+03 4338.000000 4.338000e+03 4.338000e+03
mean 1.866208e+03 18.662084 1.111880e+03 5.056564e+03
std 7.913609e+02 7.913609 4.984885e+02 2.025304e+03
min 0.000000e+00 0.000000 0.000000e+00 1.100000e+01
25% 1.331000e+03 13.310000 7.680000e+02 3.752250e+03
50% 1.933500e+03 19.335000 1.088500e+03 5.102500e+03
75% 2.437000e+03 24.370000 1.426000e+03 6.513500e+03
max 3.763000e+03 37.630000 2.838000e+03 1.107800e+04
sum 8.095612e+06 NaN 4.823335e+06 2.193537e+07
expected_count 4.500000e+02 450.000000 4.500000e+02 4.500000e+02

Show Total Tile Counts¶

For each batch, cell line, replicate and markerTotal number of tiles

First, we look at all cell lines togther:¶

In [24]:
show_total_valid_tiles_per_marker_and_batch(total_sum, vmax=10000)

Separating into cell lines & batches:¶

In [25]:
to_heatmap = total_sum.rename(columns={'n_valid_tiles':'index'})
plot_filtering_heatmap(to_heatmap, 
                       extra_index='marker', 
                       vmin=None, vmax=None,
                       xlabel = 'Total number of tiles', 
                       show_sum=True, 
                       figsize=(8,28))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)

Show Total Whole Cell Counts¶

For each batch, cell line, replicate and markerTotal number of tiles

In [26]:
to_heatmap = total_sum.rename(columns={'site_whole_cells_counts_sum':'index'})
plot_filtering_heatmap(to_heatmap, #to_heatmap[to_heatmap.batch=='batch7'], 
                       extra_index='marker', 
                       vmin=None, vmax=None,
                       xlabel = 'Total number of whole cells', 
                       show_sum=True, 
                       figsize=(8,28))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)

Show Cell Count Statistics per Batch¶

In [27]:
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]

plot_cell_count(df_no_empty_sites, 
                new_d8_lines_order, 
                new_d8_custom_palette, 
                y='site_cell_count_sum', 
                title='Cell Count Average per Site (from tiles)')

plot_cell_count(df_no_empty_sites, 
                new_d8_lines_order, 
                new_d8_custom_palette, 
                y='site_whole_cells_counts_sum',
                title='Whole Cell Count Average per Site')

plot_cell_count(df_no_empty_sites, 
                new_d8_lines_order, 
                new_d8_custom_palette, 
                y='site_cell_count',
                title='Cellpose Cell Count Average per Site')
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:623: UserWarning: The palette list has more values (10) than needed (8), which may not be intended.
  c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,

Show Tiles per Site Statistics¶

In [28]:
df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean()
Out[28]:
cell_line_cond
FUSHeterozygous    7.729166
FUSHomozygous      8.147912
FUSRevertant       7.297871
OPTN               8.778378
SNCA               6.696576
TBK1               7.475947
TDP43              9.103717
WT Untreated       9.866227
Name: n_valid_tiles, dtype: float64
In [29]:
df_dapi[['site_cell_count']].mean()
Out[29]:
site_cell_count    22.317758
dtype: float64
In [30]:
plot_catplot(df_dapi, 
             new_d8_custom_palette,
             new_d8_reps, 
             x='n_valid_tiles', 
             x_title='valid tiles count', 
             batches=batches)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1063: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep']

Show Mean of cell count in valid tiles¶

In [31]:
plot_hm_of_mean_cell_count_per_tile(df_dapi, split_by='rep', rows='cell_line', columns='panel', vmax=3)
In [32]:
df_dapi[['cells_count_in_valid_tiles_mean']].mean()
Out[32]:
cells_count_in_valid_tiles_mean    1.735626
dtype: float64
In [33]:
df_dapi[['site_cell_count']].mean()
Out[33]:
site_cell_count    22.317758
dtype: float64

Assessing Staining Reproducibility and Outliers¶

In [34]:
# for batch in batches:
#     print(batch)
#     #batch_num = batch.replace('batch',"")
#     run_calc_hist_new(batch,new_d8_cell_lines_for_disp, new_d8_markers, root_directory_raw, root_directory_proc,
#                            hist_sample=10,sample_size_per_markers=200, ncols=7, nrows=4)
#     print("="*30)
In [35]:
# # save notebook as HTML ( the HTML will be saved in the same folder the original script is)
# display(Javascript('IPython.notebook.save_checkpoint();'))
# os.system(f'jupyter nbconvert --to html {NOVA_HOME}/tools/preprocessing_tools/qc_reports/qc_report_new_indi_d8_Opera.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/ManuscriptFinalData/qc_report_new_indi_d8_Opera.html')
In [ ]: